* Prepare data for PyTwoWay package (geotwfe03) 

cap noisily program drop prep_pytwoway twfe_baseline card_vardecomp
program prep_pytwoway
	
	* Load data and restrict to twfe-sample
	use id year cz90 age logptotinc mover_type using  "${intermediate_data}/twfe/`1'_movers.dta", clear
	keep if mover_type == "Single Movers" & inrange(age, 40,55) // keep single movers aged 40-55
	
	* Generate CZ1990 id
	ren cz90 j // "firm id" here "cz id"
		
	* Rename and label variables
	gen y =  logptotinc
	ren id i // worker id
	ren year t
	
	* Restrict to relevant varibles
	keep y i j t 
		
	compress
	save "${intermediate_data}/twfe/geotwfe03-`1'_movers.dta", replace

end

* Run manual regressions without controls to use for 
* comparison with the python package output

program twfe_baseline

	* Load data and restrict to twfe-sample
	use id year cz90 age logptotinc mover_type using  "${intermediate_data}/twfe/`1'_movers.dta", clear
	keep if mover_type == "Single Movers" & inrange(age, 40,55) // keep single movers aged 40-55
	xtset id year 
	
	* Estimation 
	reghdfe logptotinc, a(cz90 id, savefe) resid
	local N_UR = e(N)
	keep if e(sample)
			
	keep logptotinc cz90 id year __hdfe* _reghdfe_resid
	rename (__hdfe1__ __hdfe2__) (__hdfePLACE__ __hdfeINDIVID__)
	gen N_UR = `N_UR'
	save "${temp}/IndividFixedEffects_`1'", replace

	gcollapse logptotinc __hdfe*__ N_UR, by(cz90)
	save "${temp}/CZFixedEffects_`1'", replace
		
end

* Variance decomposition

program card_vardecomp
	use "${temp}/IndividFixedEffects_`1'", clear
 
	local N_UR = `=N_UR[1]'
 
	rename (__hdfePLACE__ __hdfeINDIVID__) (__hdfe1__ __hdfe2__) 
	
	preserve
		foreach y in 1 2 {
			cap confirm variable __hdfe`y'__
			if !_rc {
				qui sum __hdfe`y'__
				gen sd_hdfe`y' = `r(sd)'
				gen CoVar_hdfe`y' = `r(Var)'

				foreach z in 1 2 {
					cap confirm variable __hdfe`z'__
					if !_rc & `y' < `z' {
						qui corr __hdfe`y'__ __hdfe`z'__
						gen  Corr_hdfe`y'`z' = r(C)[1,2]

						qui corr __hdfe`y'__ __hdfe`z'__, cov
						gen CoVar_hdfe`y'`z' = `r(cov_12)'
					}
				}
			}
		}

		* Variance of the residual
		qui sum _reghdfe_resid 
		gen CoVar_resid = `r(Var)'

		gcollapse (sd) sd_logptotinc = logptotinc (first) sd_* Corr_* CoVar_*

		gen CoVar_logptotinc = sd_logptotinc * sd_logptotinc

		gen CoVar_recomposed =	CoVar_hdfe1 + CoVar_hdfe2 + 2*CoVar_hdfe12 + CoVar_resid
		
		gen sd_recomposed = sqrt(CoVar_recomposed)
		gen spec = "Person-Year Level"
		gen sample = "`1'" 
		save "${temp}/_personyear_`1'", replace
	restore

	gcollapse logptotinc __hdfe* _reghdfe_resid, by(cz90)

	foreach y in 1 2 {
		cap confirm variable __hdfe`y'__
		if !_rc {
			qui sum __hdfe`y'__, d
			gen sd_hdfe`y' = `r(sd)'
			gen CoVar_hdfe`y' = `r(Var)'

			foreach z in 1 2 {
				cap confirm variable __hdfe`z'__
				if !_rc & `y' < `z' {
					qui corr __hdfe`y'__ __hdfe`z'__
					gen  Corr_hdfe`y'`z' = r(C)[1,2]

					qui corr __hdfe`y'__ __hdfe`z'__, cov
					gen CoVar_hdfe`y'`z' = `r(cov_12)'
				}
			}
		}
	}

	* Variance of the residual
	qui sum _reghdfe_resid 
	gen CoVar_resid = `r(Var)'

	gcollapse (sd) sd_logptotinc = logptotinc (first) sd_* Corr_* CoVar_*

	gen CoVar_logptotinc = sd_logptotinc * sd_logptotinc

	gen CoVar_recomposed =	CoVar_hdfe1 + CoVar_hdfe2 + 2*CoVar_hdfe12 + CoVar_resid

	gen sd_recomposed = sqrt(CoVar_recomposed)

	gen spec = "CZ Level"
	gen sample = "`1'"
	append using "${temp}/_personyear_`1'"

	reshape long sd_ CoVar_ Corr_, i(spec) j(effect) string
	
	replace effect = subinstr(effect, "12", "Place/Individ", .)
	replace effect = subinstr(effect, "1", "Place", .)
	replace effect = subinstr(effect, "2", "Individ", .)
	replace effect = subinstr(effect, "hdfe", "", .)
		
	gen id =.
	replace id = 1 if effect == "logptotinc"
	replace id = 2 if effect == "recomposed"
	replace id = 3 if effect == "Place"
	replace id = 4 if effect == "Individ"
	replace id = 5 if effect == "Place/Individ"
		
	bys spec: gen temp = CoVar_ if effect == "recomposed"
	egen temp2 = max(temp), by(spec)
	by spec: gen Var_share = CoVar_ / temp2
	
	gen N_UR = `N_UR'
	
	order sample spec, first 
	gsort sample -spec id
	drop id temp temp2
	order N_UR, last
	
	save "${temp}/card_`1'", replace
	
end

* Run mover-bias robustness checks

foreach sample in physicians lawyers {
	prep_pytwoway "`sample'"
	twfe_baseline "`sample'"
	card_vardecomp "`sample'"
}
	
* Combine all estimates into one table
use "${temp}/card_physicians.dta", clear
	append using "${temp}/card_lawyers.dta"
	save "${output}/twfe/geo_twfe/geotwfe06-earnings_decomp_card_nocontrols.dta", replace
	
